from data_process import *

from transformers import *
from data_loader import writer_tfrecord, read_tf_record
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


def create_similar(similar_fake_num):

    input_ids, attention_mask, token_type_ids, labels, _ = preprocess_train(["data/train.conll"],
                                                                            tokenizer,
                                                                            128,
                                                                            False,
                                                                            include_fake_label=True,
                                                                            similar_fake_num=similar_fake_num)
    writer_tfrecord("demo", "train", f"gs://ccks2021/ner-lab/roformer-char-full-token-{similar_fake_num}", input_ids,
                    attention_mask, token_type_ids, labels)

